{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7775dd48",
   "metadata": {},
   "source": [
    "# RedNote Collect Fans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b148366f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import uiautomator2 as u2\n",
    "import pandas as pd\n",
    "import random\n",
    "import time\n",
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "332bb095",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_sleep(min_sec=6.0, max_sec=9.0):\n",
    "    \"\"\"\n",
    "    在 min_sec 到 max_sec 之间随机 sleep 一段时间。\n",
    "    用于模拟人类行为，避免被检测为自动脚本。\n",
    "    \"\"\"\n",
    "    delay = random.uniform(min_sec, max_sec)\n",
    "    time.sleep(delay)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d3940124",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ========== 配置 ==========\n",
    "HOST_ID = \"597139594\"\n",
    "CSV_FILE = f\"{HOST_ID}.csv\"\n",
    "MAX_SCROLL = 10  # 最多滑动次数（避免死循环）\n",
    "WAIT = 2  # 每一步的等待时间（可调节）\n",
    "\n",
    "# ========== 初始化 ==========\n",
    "d = u2.connect()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1702392f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ========== 启动小红书 ==========\n",
    "d.app_start(\"com.xingin.xhs\")\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "319fd8ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ========== 搜索博主 ==========\n",
    "# 点击搜索框（你需要用XPath或坐标定位）\n",
    "d.xpath(\"//*[@resource-id='com.xingin.xhs:id/search']\").click()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b09d0438",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输入博主ID\n",
    "for ch in HOST_ID:\n",
    "    d.send_keys(ch)\n",
    "    time.sleep(0.2)  # 模拟人类打字速度\n",
    "time.sleep(WAIT)\n",
    "d.xpath('//*[@text=\"搜索\"]').click()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e1ac4f00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击第一个搜索结果\n",
    "d.xpath('//*[@resource-id=\"com.xingin.xhs:id/mOneBoxUserTvUsername\"]').click()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "081c1d38",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ========== 进入主页并点击“粉丝” ==========\n",
    "# d.xpath('//*[@resource-id=\"com.xingin.xhs:id/fansLabel\"]').click()\n",
    "d.xpath('//*[@resource-id=\"com.xingin.xhs:id/followLabel\"]').click()\n",
    "time.sleep(WAIT)\n",
    "d.xpath('//*[@resource-id=\"com.xingin.xhs:id/tabView\" and @content-desc=\"粉丝\"]').click()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "cc668b17",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ========== 若有“查看全部”，则点击 ==========\n",
    "if d.xpath('//*[@text=\"查看全部\"]').exists:\n",
    "    d.xpath('//*[@text=\"查看全部\"]').click()\n",
    "    time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "10d35df6",
   "metadata": {},
   "outputs": [],
   "source": [
    "followers = d.xpath('//android.view.View').all()\n",
    "fan = followers[0] if followers else None\n",
    "fan.click()\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "09122c42",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5668538238\n"
     ]
    }
   ],
   "source": [
    "fan_id = d.xpath('//*[@resource-id=\"com.xingin.xhs:id/profile_new_page_avatar_card_redid\"]').get_text()\n",
    "fan_id = fan_id.split(\"：\")[1].strip()\n",
    "print(fan_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "a7891965",
   "metadata": {},
   "outputs": [],
   "source": [
    "d.press(\"back\")\n",
    "time.sleep(WAIT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed0611ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "小红书号：5668538238\n",
      "小红书号：9548272995\n",
      "小红书号：495103028\n",
      "小红书号：383018987\n",
      "小红书号：2265196810\n",
      "小红书号：1802405326\n",
      "小红书号：7423146591\n",
      "小红书号：3493313533\n"
     ]
    }
   ],
   "source": [
    "for fan in followers:\n",
    "    fan.click()\n",
    "    random_sleep()\n",
    "    fan_id = d.xpath('//*[@resource-id=\"com.xingin.xhs:id/profile_new_page_avatar_card_redid\"]').get_text()\n",
    "    fan_id = fan_id.split(\"：\")[1].strip()\n",
    "    print(fan_id)\n",
    "    d.press(\"back\")\n",
    "    random_sleep()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "5406d8f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d(scrollable=True).scroll.vert.forward(steps=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "6d5d4dda",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1080, 2340)"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.window_size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a871b321",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "start_x = random.randint(450, 550)\n",
    "start_y = random.randint(1600, 1800)\n",
    "end_x = start_x + random.randint(-10, 10)\n",
    "end_y = random.randint(400, 550)\n",
    "duration = random.uniform(0.3, 0.5)\n",
    "d.swipe(start_x, start_y, end_x, end_y, duration)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "1c15655c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.swipe(450, 1800, 455, 400, duration)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "89ced2ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_xhs_follower_ids(user_id, max_scroll=10):\n",
    "    \"\"\"\n",
    "    获取小红书用户的粉丝ID列表。\n",
    "    \n",
    "    :param user_id: 小红书用户ID\n",
    "    :param max_scroll: 最大滑动次数，防止死循环\n",
    "    :return: 粉丝ID列表\n",
    "    \"\"\"\n",
    "    csv_file = f\"{user_id}.csv\"\n",
    "    # ========== 初始化 ==========\n",
    "    d = u2.connect()\n",
    "    random_sleep(2, 3)\n",
    "    \n",
    "    # ========== 启动小红书 ==========\n",
    "    d.app_start(\"com.xingin.xhs\")\n",
    "    random_sleep(2, 3)\n",
    "    \n",
    "    # ========== 搜索博主 ==========\n",
    "    # 点击搜索框\n",
    "    d.xpath(\"//*[@resource-id='com.xingin.xhs:id/search']\").click()\n",
    "    random_sleep(1, 2)\n",
    "    # 输入博主ID\n",
    "    for ch in user_id:\n",
    "        d.send_keys(ch)\n",
    "        time.sleep(0.2)  # 模拟人类打字速度\n",
    "    random_sleep(1, 2)\n",
    "    # 点击搜索\n",
    "    d.xpath('//*[@text=\"搜索\"]').click()\n",
    "    random_sleep(2, 3)\n",
    "    # 点击第一个搜索结果\n",
    "    d.xpath('//*[@resource-id=\"com.xingin.xhs:id/mOneBoxUserTvUsername\"]').click()\n",
    "    random_sleep(4, 5)\n",
    "    \n",
    "    # ========== 进入主页并打开粉丝列表 ==========\n",
    "    d.xpath('//*[@resource-id=\"com.xingin.xhs:id/fansLabel\"]').click()\n",
    "    random_sleep(3, 5)\n",
    "    # d.xpath('//*[@resource-id=\"com.xingin.xhs:id/followLabel\"]').click()\n",
    "    # random_sleep(2, 3)\n",
    "    # d.xpath('//*[@resource-id=\"com.xingin.xhs:id/tabView\" and @content-desc=\"粉丝\"]').click()\n",
    "    # random_sleep(2, 3)\n",
    "    \n",
    "    # ========== 若有“查看全部”，则点击 ==========\n",
    "    if d.xpath('//*[@text=\"查看全部\"]').exists:\n",
    "        d.xpath('//*[@text=\"查看全部\"]').click()\n",
    "        random_sleep(2, 3)\n",
    "    \n",
    "    # ========== 获取粉丝列表 ==========\n",
    "    scrolls = 0\n",
    "    seen = set()\n",
    "    fan_ids = []\n",
    "    last_count = 0\n",
    "\n",
    "    for i in range(max_scroll):\n",
    "        # 滚动次数\n",
    "        if scrolls % 3 == 0 and scrolls > 0:\n",
    "            print(f\"已滚动 {scrolls} 次，当前收集到 {len(fan_ids)} 个粉丝ID\")\n",
    "            random_sleep(200, 300)\n",
    "        # ========== 逐个粉丝节点获取 ==========\n",
    "        followers = d.xpath('//android.view.View').all()\n",
    "        for fan in followers:\n",
    "            fan.click()\n",
    "            random_sleep()\n",
    "            fan_id = d.xpath('//*[@resource-id=\"com.xingin.xhs:id/profile_new_page_avatar_card_redid\"]').get_text()\n",
    "            fan_id = fan_id.split(\"：\")[1].strip()\n",
    "            if fan_id not in seen:\n",
    "                print(fan_id)\n",
    "                fan_ids.append(fan_id)\n",
    "                seen.add(fan_id)\n",
    "            d.press(\"back\")\n",
    "            random_sleep()\n",
    "        \n",
    "        # 若本轮无新增，则提前退出\n",
    "        if len(fan_ids) == last_count:\n",
    "            print(\"本轮无新增粉丝ID，提前退出\")\n",
    "            break\n",
    "        last_count = len(fan_ids)\n",
    "\n",
    "        # 向下滑动加载更多\n",
    "        # d(scrollable=True).scroll.vert.forward(steps=3)\n",
    "        # scrolls += 1\n",
    "        # random_sleep(2, 3)\n",
    "        start_x = random.randint(450, 550)\n",
    "        start_y = random.randint(1600, 1800)\n",
    "        end_x = start_x + random.randint(-10, 10)\n",
    "        end_y = random.randint(400, 550)\n",
    "        duration = random.uniform(0.3, 0.5)\n",
    "        d.swipe(start_x, start_y, end_x, end_y, duration)\n",
    "        scrolls += 1\n",
    "        random_sleep(2, 3)\n",
    "    \n",
    "    # ========== 一次性保存为 CSV ==========\n",
    "    df = pd.DataFrame({'粉丝ID': fan_ids})\n",
    "    df.to_csv(csv_file, index=False, encoding='utf-8-sig')\n",
    "    \n",
    "    return fan_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "8a7e655e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2600560844\n",
      "390054317\n",
      "903497803\n",
      "666114480\n",
      "836855785\n",
      "993784135\n",
      "971751346\n",
      "553918356\n",
      "556578149\n",
      "531493563\n",
      "ismine_xuan\n",
      "187185651\n",
      "493747899\n",
      "264095836\n",
      "null\n",
      "116164543\n",
      "952714153\n",
      "已滚动 3 次，当前收集到 17 个粉丝ID\n",
      "本轮无新增粉丝ID，提前退出\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['2600560844',\n",
       " '390054317',\n",
       " '903497803',\n",
       " '666114480',\n",
       " '836855785',\n",
       " '993784135',\n",
       " '971751346',\n",
       " '553918356',\n",
       " '556578149',\n",
       " '531493563',\n",
       " 'ismine_xuan',\n",
       " '187185651',\n",
       " '493747899',\n",
       " '264095836',\n",
       " 'null',\n",
       " '116164543',\n",
       " '952714153']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_xhs_follower_ids(user_id=\"114765256\", max_scroll=10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "da-env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
